### REPLICATION FILE -- Analysis
### Jonathan Homola, Natalie Jackson & Jeff Gill
### "A measure of survey mode differences"
### Electoral Studies 44: 255-274
### http://dx.doi.org/10.1016/j.electstud.2016.06.010

## clear environment, set seed, install/load packages
rm(list=ls())
set.seed(12435); options(stringsAsFactors=F)
#install.packages("xtable")
library("xtable")

## set working directory
#setwd(" ... ")

## read in the main dataset 
anes <- read.csv("HomolaJacksonGill_EntropyModeDifferences_Data.csv")
f2f <- subset(anes, mode==1)
inet <- subset(anes, mode==2)



####
#### Table 1 Comparing medians for face-to-face and internet modes
####
## calculate entropy measures - 2-3 items (first part)
f2fall <- inetall <- NULL
for (i in 1:856){
  if (!is.na(anes[5916,i])){
    if (anes[5916,i]==1){             ## change "level" here
      f2fall <- cbind(f2fall, f2f[,i])
      inetall <- cbind(inetall, inet[,i])
    }}}
f2fmedian <- apply(f2fall, 2, function(x) median(as.numeric(x), na.rm=T))
inetmedian <- apply(inetall, 2, function(x) median(as.numeric(x), na.rm=T))

## bootstrap
B <- 5000
f2fboot <- inetboot <- NULL
for (i in 1:B) {
  f2fboot <- cbind(f2fboot,sample(f2fmedian,length(f2fmedian),replace=TRUE))
  inetboot <- cbind(inetboot,sample(inetmedian,length(inetmedian),replace=TRUE))
  if(i%%1000==0) print(i)
}
f2f.mean <- mean(apply(f2fboot,1,mean))
f2f.se <- sqrt(var(apply(f2fboot,1,mean)))
inet.mean <- mean(apply(inetboot,1,mean))
inet.se <- sqrt(var(apply(inetboot,1,mean)))
results <- matrix(c(mean(apply(f2fboot,1,mean)), sqrt(var(apply(f2fboot,1,mean))),
                    mean(apply(inetboot,1,mean)), sqrt(var(apply(inetboot,1,mean)))), 
                  ncol=2, nrow=2, byrow=T)
colnames(results) <- c("BSM", "BSE")
rownames(results) <- c("f2f", "inet")
round(results, 3)
round(t.test(f2fboot, inetboot)$estimate[1] - t.test(f2fboot, inetboot)$estimate[2], 3)
round(t.test(f2fboot, inetboot)$statistic, 3)

## calculate entropy measures - 4-5 items (second part)
f2fall <- inetall <- NULL
for (i in 1:856){
  if (!is.na(anes[5916,i])){
    if (anes[5916,i]==2){             ## change "level" here
      f2fall <- cbind(f2fall, f2f[,i])
      inetall <- cbind(inetall, inet[,i])
    }}}
f2fmedian <- apply(f2fall, 2, function(x) median(as.numeric(x), na.rm=T))
inetmedian <- apply(inetall, 2, function(x) median(as.numeric(x), na.rm=T))

## bootstrap
B <- 5000
f2fboot <- inetboot <- NULL
for (i in 1:B) {
  f2fboot <- cbind(f2fboot,sample(f2fmedian,length(f2fmedian),replace=TRUE))
  inetboot <- cbind(inetboot,sample(inetmedian,length(inetmedian),replace=TRUE))
  if(i%%1000==0) print(i)
}
f2f.mean <- mean(apply(f2fboot,1,mean))
f2f.se <- sqrt(var(apply(f2fboot,1,mean)))
inet.mean <- mean(apply(inetboot,1,mean))
inet.se <- sqrt(var(apply(inetboot,1,mean)))
results <- matrix(c(mean(apply(f2fboot,1,mean)), sqrt(var(apply(f2fboot,1,mean))),
                    mean(apply(inetboot,1,mean)), sqrt(var(apply(inetboot,1,mean)))), 
                  ncol=2, nrow=2, byrow=T)
colnames(results) <- c("BSM", "BSE")
rownames(results) <- c("f2f", "inet")
round(results, 3)
round(t.test(f2fboot, inetboot)$estimate[1] - t.test(f2fboot, inetboot)$estimate[2], 3)
round(t.test(f2fboot, inetboot)$statistic, 3)

## calculate entropy measures - 7-11 items (third part)
f2fall <- inetall <- NULL
for (i in 1:856){
  if (!is.na(anes[5916,i])){
    if (anes[5916,i]==3){             ## change "level" here
      f2fall <- cbind(f2fall, f2f[,i])
      inetall <- cbind(inetall, inet[,i])
    }}}
f2fmedian <- apply(f2fall, 2, function(x) median(as.numeric(x), na.rm=T))
inetmedian <- apply(inetall, 2, function(x) median(as.numeric(x), na.rm=T))

## bootstrap
B <- 5000
f2fboot <- inetboot <- NULL
for (i in 1:B) {
  f2fboot <- cbind(f2fboot,sample(f2fmedian,length(f2fmedian),replace=TRUE))
  inetboot <- cbind(inetboot,sample(inetmedian,length(inetmedian),replace=TRUE))
  if(i%%1000==0) print(i)
}
f2f.mean <- mean(apply(f2fboot,1,mean))
f2f.se <- sqrt(var(apply(f2fboot,1,mean)))
inet.mean <- mean(apply(inetboot,1,mean))
inet.se <- sqrt(var(apply(inetboot,1,mean)))
results <- matrix(c(mean(apply(f2fboot,1,mean)), sqrt(var(apply(f2fboot,1,mean))),
                    mean(apply(inetboot,1,mean)), sqrt(var(apply(inetboot,1,mean)))), 
                  ncol=2, nrow=2, byrow=T)
colnames(results) <- c("BSM", "BSE")
rownames(results) <- c("f2f", "inet")
round(results, 3)
round(t.test(f2fboot, inetboot)$estimate[1] - t.test(f2fboot, inetboot)$estimate[2], 3)
round(t.test(f2fboot, inetboot)$statistic, 3)

## calculate entropy measures - 101 items (fourth part)
f2fall <- inetall <- NULL
for (i in 1:856){
  if (!is.na(anes[5916,i])){
    if (anes[5916,i]==4){             ## change "level" here
      f2fall <- cbind(f2fall, f2f[,i])
      inetall <- cbind(inetall, inet[,i])
    }}}
f2fmedian <- apply(f2fall, 2, function(x) median(as.numeric(x), na.rm=T))
inetmedian <- apply(inetall, 2, function(x) median(as.numeric(x), na.rm=T))

## bootstrap
B <- 5000
f2fboot <- inetboot <- NULL
for (i in 1:B) {
  f2fboot <- cbind(f2fboot,sample(f2fmedian,length(f2fmedian),replace=TRUE))
  inetboot <- cbind(inetboot,sample(inetmedian,length(inetmedian),replace=TRUE))
  if(i%%1000==0) print(i)
}
f2f.mean <- mean(apply(f2fboot,1,mean))
f2f.se <- sqrt(var(apply(f2fboot,1,mean)))
inet.mean <- mean(apply(inetboot,1,mean))
inet.se <- sqrt(var(apply(inetboot,1,mean)))
results <- matrix(c(mean(apply(f2fboot,1,mean)), sqrt(var(apply(f2fboot,1,mean))),
                    mean(apply(inetboot,1,mean)), sqrt(var(apply(inetboot,1,mean)))), 
                  ncol=2, nrow=2, byrow=T)
colnames(results) <- c("BSM", "BSE")
rownames(results) <- c("f2f", "inet")
round(results, 3)
round(t.test(f2fboot, inetboot)$estimate[1] - t.test(f2fboot, inetboot)$estimate[2], 3)
round(t.test(f2fboot, inetboot)$statistic, 3)



####
#### Table 2 Entropy descriptive statistics
####

## calculate entropy measures - 2-3 items (first part)
f2f <- subset(anes, mode==1)
inet <- subset(anes, mode==2)
f2f_entrop <- inet_entrop <- names <- NULL
for (i in 1:856){
  if (!is.na(anes[5916,i])){
    if (anes[5916,i] == 1){   # change "level" here
      names <- c(names, colnames(anes[i]))
      p1 <- p2 <- NULL
      h1 <- h2 <- 0
      for (j in 1:length(as.matrix(table(f2f[,i])))){
        p1 <- (as.matrix(table(f2f[,i]))[j])/(sum(as.matrix(table(f2f[,i]))))
        h1 <- h1 + p1 * log(p1)
      }
      for (j in 1:length(as.matrix(table(inet[,i])))){
        p2 <- (as.matrix(table(inet[,i]))[j])/(sum(as.matrix(table(inet[,i]))))
        h2 <- h2 + p2 * log(p2)
      }
      f2f_entrop <- c(f2f_entrop, h1)
      inet_entrop <- c(inet_entrop, h2)
    }}}

f2f_entrop <- -f2f_entrop
inet_entrop <- -inet_entrop

## display entropy results - 2-3 items (first part)
results <- matrix(c(mean(f2f_entrop), var(f2f_entrop), min(f2f_entrop), max(f2f_entrop),
                    mean(inet_entrop), var(inet_entrop), min(inet_entrop), max(inet_entrop)), 
                  ncol=4, nrow=2, byrow=T)
colnames(results) <- c("mean", "var", "min", "max")
rownames(results) <- c("f2f", "inet")
round(results, 3)
round(-t.test(f2f_entrop, inet_entrop)$statistic, 3)

inet1 <- inet_entrop
f2f1 <- f2f_entrop
names(inet1) <- names(f2f1) <- names


## calculate entropy measures - 4-5 items (second part)
f2f <- subset(anes, mode==1)
inet <- subset(anes, mode==2)
f2f_entrop <- inet_entrop <- names <- NULL
for (i in 1:856){
  if (!is.na(anes[5916,i])){
    if (anes[5916,i] == 2){   # change "level" here
      names <- c(names, colnames(anes[i]))
      p1 <- p2 <- NULL
      h1 <- h2 <- 0
      for (j in 1:length(as.matrix(table(f2f[,i])))){
        p1 <- (as.matrix(table(f2f[,i]))[j])/(sum(as.matrix(table(f2f[,i]))))
        h1 <- h1 + p1 * log(p1)
      }
      for (j in 1:length(as.matrix(table(inet[,i])))){
        p2 <- (as.matrix(table(inet[,i]))[j])/(sum(as.matrix(table(inet[,i]))))
        h2 <- h2 + p2 * log(p2)
      }
      f2f_entrop <- c(f2f_entrop, h1)
      inet_entrop <- c(inet_entrop, h2)
    }}}

f2f_entrop <- -f2f_entrop
inet_entrop <- -inet_entrop

## display entropy results - 4-5 items (second part)
results <- matrix(c(mean(f2f_entrop), var(f2f_entrop), min(f2f_entrop), max(f2f_entrop),
                    mean(inet_entrop), var(inet_entrop), min(inet_entrop), max(inet_entrop)), 
                  ncol=4, nrow=2, byrow=T)
colnames(results) <- c("mean", "var", "min", "max")
rownames(results) <- c("f2f", "inet")
round(results, 3)
round(-t.test(f2f_entrop, inet_entrop)$statistic, 3)

inet2 <- inet_entrop
f2f2 <- f2f_entrop
names(inet2) <- names(f2f2) <- names


## calculate entropy measures - 7-10 items (third part)
f2f <- subset(anes, mode==1)
inet <- subset(anes, mode==2)
f2f_entrop <- inet_entrop <- names <- NULL
for (i in 1:856){
  if (!is.na(anes[5916,i])){
    if (anes[5916,i] == 3){   # change "level" here
      names <- c(names, colnames(anes[i]))
      p1 <- p2 <- NULL
      h1 <- h2 <- 0
      for (j in 1:length(as.matrix(table(f2f[,i])))){
        p1 <- (as.matrix(table(f2f[,i]))[j])/(sum(as.matrix(table(f2f[,i]))))
        h1 <- h1 + p1 * log(p1)
      }
      for (j in 1:length(as.matrix(table(inet[,i])))){
        p2 <- (as.matrix(table(inet[,i]))[j])/(sum(as.matrix(table(inet[,i]))))
        h2 <- h2 + p2 * log(p2)
      }
      f2f_entrop <- c(f2f_entrop, h1)
      inet_entrop <- c(inet_entrop, h2)
    }}}

f2f_entrop <- -f2f_entrop
inet_entrop <- -inet_entrop

## display entropy results - 7-10 items (third part)
results <- matrix(c(mean(f2f_entrop), var(f2f_entrop), min(f2f_entrop), max(f2f_entrop),
                    mean(inet_entrop), var(inet_entrop), min(inet_entrop), max(inet_entrop)), 
                  ncol=4, nrow=2, byrow=T)
colnames(results) <- c("mean", "var", "min", "max")
rownames(results) <- c("f2f", "inet")
round(results, 3)
round(-t.test(f2f_entrop, inet_entrop)$statistic, 3)

inet3 <- inet_entrop
f2f3 <- f2f_entrop
names(inet3) <- names(f2f3) <- names


## calculate entropy measures - 101 items (fourth part)
f2f <- subset(anes, mode==1)
inet <- subset(anes, mode==2)
f2f_entrop <- inet_entrop <- names <- NULL
for (i in 1:856){
  if (!is.na(anes[5916,i])){
    if (anes[5916,i] == 4){   # change "level" here
      names <- c(names, colnames(anes[i]))
      p1 <- p2 <- NULL
      h1 <- h2 <- 0
      for (j in 1:length(as.matrix(table(f2f[,i])))){
        p1 <- (as.matrix(table(f2f[,i]))[j])/(sum(as.matrix(table(f2f[,i]))))
        h1 <- h1 + p1 * log(p1)
      }
      for (j in 1:length(as.matrix(table(inet[,i])))){
        p2 <- (as.matrix(table(inet[,i]))[j])/(sum(as.matrix(table(inet[,i]))))
        h2 <- h2 + p2 * log(p2)
      }
      f2f_entrop <- c(f2f_entrop, h1)
      inet_entrop <- c(inet_entrop, h2)
    }}}

f2f_entrop <- -f2f_entrop
inet_entrop <- -inet_entrop

## display entropy results - 101 items (fourth part)
results <- matrix(c(mean(f2f_entrop), var(f2f_entrop), min(f2f_entrop), max(f2f_entrop),
                    mean(inet_entrop), var(inet_entrop), min(inet_entrop), max(inet_entrop)), 
                  ncol=4, nrow=2, byrow=T)
colnames(results) <- c("mean", "var", "min", "max")
rownames(results) <- c("f2f", "inet")
round(results, 3)
round(-t.test(f2f_entrop, inet_entrop)$statistic, 3)

inet4 <- inet_entrop
f2f4 <- f2f_entrop
names(inet4) <- names(f2f4) <- names



####
#### Calculate mode differences in entropy
####
diff1 <- inet1-f2f1
diff2 <- inet2-f2f2
diff3 <- inet3-f2f3
diff4 <- inet4-f2f4




####
#### Table 3 Greatest face-to-face entropies
####
xtable(t(sort(f2f1)[228:232]), digits=4)
xtable(t(sort(f2f2)[151:155]), digits=4)
xtable(t(sort(f2f3)[74:78]), digits=4)
xtable(t(sort(f2f4)[45:49]), digits=4)



####
#### Table 4 Greatest internet entropies
####
xtable(t(sort(inet1)[228:232]), digits=4)
xtable(t(sort(inet2)[151:155]), digits=4)
xtable(t(sort(inet3)[74:78]), digits=4)
xtable(t(sort(inet4)[45:49]), digits=4)



####
#### Table 5 Greatest entropy differences
####
xtable(t(sort(diff1)[3:1]), digits=4)
xtable(t(sort(diff1)[230:232]), digits=4)
xtable(t(sort(diff2)[3:1]), digits=4)
xtable(t(sort(diff2)[153:155]), digits=4)
xtable(t(sort(diff3)[3:1]), digits=4)
xtable(t(sort(diff3)[76:78]), digits=4)
xtable(t(sort(diff4)[3:1]), digits=4)
xtable(t(sort(diff4)[46:49]), digits=4)



####
#### Figure 3 Entropy histograms
####
#pdf("Fig3.pdf",height=7,width=6.125)
par(mfrow=c(4,2))
hist(f2f1, freq=F, xlim=c(0.25,2.8), ylim=c(0,3), cex.lab=0.85, cex.main=1.1,
     xlab="F2F Entropy", main="2 and 3 point items")
hist(inet1, freq=F, xlim=c(0.25,2.8), ylim=c(0,3), cex.lab=0.85, cex.main=1.1,
     xlab="Internet Entropy", main="k = 232")
hist(f2f2, freq=F, xlim=c(0.25,2.8), ylim=c(0,3), cex.lab=0.85, cex.main=1.1,
     xlab="F2F Entropy", main="4 and 5 point items")
hist(inet2, freq=F, xlim=c(0.25,2.8), ylim=c(0,3), cex.lab=0.85, cex.main=1.1,
     xlab="Internet Entropy", main="k = 155")
hist(f2f3, freq=F, xlim=c(0.25,2.8), ylim=c(0,3), cex.lab=0.85, cex.main=1.1,
     xlab="F2F Entropy", main="7 to 11 point items")
hist(inet3, freq=F, xlim=c(0.25,2.8), ylim=c(0,3), cex.lab=0.85, cex.main=1.1,
     xlab="Internet Entropy", main="k = 78")
hist(f2f4, freq=F, xlim=c(0.25,2.8), ylim=c(0,3), cex.lab=0.85, cex.main=1.1,
     xlab="F2F Entropy", main="101 point items")
hist(inet4, freq=F, xlim=c(0.25,2.8), ylim=c(0,3), cex.lab=0.85, cex.main=1.1,
     xlab="Internet Entropy", main="k = 49")
par(mfrow=c(1,1))
#dev.off()



####
#### Figure 4 Distribution of entropy differences (Internet - F2F)
####
#pdf("Fig4.pdf", height=8,width=7)
par(mfrow=c(2,2), mar=c(3.6, 3.1, 2.6, 2.1))
hist(diff1, prob=T, xlim=c(-0.6,0.8), ylim=c(0,5.7), cex.main=0.8, 
     main="2 and 3 point items", xaxt="n", yaxt="n", xlab="", ylab="")
lines(density(diff1))
axis(side=1, cex.axis=0.7)
mtext("k = 232", side=1, line=2.3, cex=0.7)
axis(side=2, cex.axis=0.7)
mtext("Density", side=2, line=2, cex=0.7)
abline(v=0)
hist(diff2, prob=T, xlim=c(-0.6,0.8), ylim=c(0,5.7), cex.main=0.8,
     main="4 and 5 point items", xaxt="n", yaxt="n", xlab="", ylab="")
lines(density(diff2))
axis(side=1, cex.axis=0.7)
mtext("k = 155", side=1, line=2.3, cex=0.7)
axis(side=2, cex.axis=0.7)
mtext("Density", side=2, line=2, cex=0.7)
abline(v=0)   
hist(diff3, prob=T, xlim=c(-0.6,0.8), ylim=c(0,5.7), cex.main=0.8,
     main="7 to 11 point items", xaxt="n", yaxt="n", xlab="", ylab="")
lines(density(diff3))
axis(side=1, cex.axis=0.7)
mtext("k = 78", side=1, line=2.3, cex=0.7)
axis(side=2, cex.axis=0.7)
mtext("Density", side=2, line=2, cex=0.7)
abline(v=0) 
hist(diff4, prob=T, xlim=c(-0.6,0.8), ylim=c(0,5.7), cex.main=0.8,
     main="101 point items", xaxt="n", yaxt="n", xlab="", ylab="")
lines(density(diff4))
axis(side=1, cex.axis=0.7)
mtext("k = 49", side=1, line=2.3, cex=0.7)
axis(side=2, cex.axis=0.7)
mtext("Density", side=2, line=2, cex=0.7)
abline(v=0) 
par(mfrow=c(1,1))
par(mar=c(5.1, 4.1, 4.1, 2.1))
#dev.off()